use prioritizr as an introduction to systemic conservation planning
Note: this lab was created by Jeffrey O. Hanson, the developer of the R package prioritizr
if (!require("librarian")){
install.packages("librarian")
library(librarian)
}
librarian::shelf(
assertthat,
BiocManager,
dplyr,
gridExtra,
here,
mapview,
prioritizr,
prioritizrdata,
raster,
remotes,
rgeos,
rgdal,
scales,
sf,
sp,
stringr,
units)
if (!require("lpsymphony")){
BiocManager::install("lpsymphony")
library(lpsymphony)
}
to download the data, please click on this link and save the data.zip file to your computer.
unzip the data.zip file, which contains pu.shp
dir_data <- here("data/prioritizr")
pu_shp <- file.path(dir_data, "pu.shp")
pu_url <- "https://github.com/prioritizr/massey-workshop/raw/main/data.zip"
pu_zip <- file.path(dir_data, basename(pu_url))
vegetation_tif <- file.path(dir_data, "vegetation.tif")
dir.create(dir_data, showWarnings = F, recursive = T)
if (!file.exists(pu_shp)){
download.file(pu_url, pu_zip)
unzip(pu_zip, exdir = dir_data)
dir_unzip <- file.path(dir_data, "data")
files_unzip <- list.files(dir_unzip, full.names = T)
file.rename(
files_unzip,
files_unzip %>% str_replace("prioritizr/data", "prioritizr"))
unlink(c(pu_zip, dir_unzip), recursive = T)
}
# import planning unit data
pu_data <- as(read_sf(pu_shp), "Spatial")
# format columns in planning unit data
pu_data$locked_in <- as.logical(pu_data$locked_in)
pu_data$locked_out <- as.logical(pu_data$locked_out)
# import vegetation data
veg_data <- stack(vegetation_tif)
The planning unit data contains spatial data describing the geometry for each planning unit and attribute data with information about each planning unit (e.g. cost values). The attribute data contains 5 columns with contain the following information:
id: unique identifiers for each planning unitcost: acquisition cost values for each planning unit (millions of Australian dollars).status: status information for each planning unit (only relevant with Marxan)locked_in: logical values (i.e. TRUE/FALSE) indicating if planning units are covered by protected areas or not.locked_out: logical values (i.e. TRUE/FALSE) indicating if planning units cannot be managed as a protected area because they contain are too degraded.# print a short summary of the data
print(pu_data)
## class : SpatialPolygonsDataFrame
## features : 516
## extent : 348703.2, 611932.4, 5167775, 5344516 (xmin, xmax, ymin, ymax)
## crs : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs
## variables : 5
## names : id, cost, status, locked_in, locked_out
## min values : 557, 3.59717531470679, 0, 0, 0
## max values : 1130, 47.238336402701, 2, 1, 1
# plot the planning unit data
plot(pu_data)
# plot an interactive map of the planning unit data
mapview(pu_data)
# print the structure of object
str(pu_data, max.level = 2)
## Formal class 'SpatialPolygonsDataFrame' [package "sp"] with 5 slots
## ..@ data :'data.frame': 516 obs. of 5 variables:
## ..@ polygons :List of 516
## ..@ plotOrder : int [1:516] 69 104 1 122 157 190 4 221 17 140 ...
## ..@ bbox : num [1:2, 1:2] 348703 5167775 611932 5344516
## .. ..- attr(*, "dimnames")=List of 2
## ..@ proj4string:Formal class 'CRS' [package "sp"] with 1 slot
# print the class of the object
class(pu_data)
## [1] "SpatialPolygonsDataFrame"
## attr(,"package")
## [1] "sp"
# print the slots of the object
slotNames(pu_data)
## [1] "data" "polygons" "plotOrder" "bbox" "proj4string"
# print the coordinate reference system
print(pu_data@proj4string)
## CRS arguments:
## +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs
# print number of planning units (geometries) in the data
row <- nrow(pu_data)
# print the first six rows in the data
head(pu_data@data)
## id cost status locked_in locked_out
## 1 557 29.74225 0 FALSE FALSE
## 2 558 29.87703 0 FALSE FALSE
## 3 574 28.60687 0 FALSE FALSE
## 4 575 30.83416 0 FALSE FALSE
## 5 576 38.75511 0 FALSE FALSE
## 6 577 38.11618 2 TRUE FALSE
# print the first six values in the cost column of the attribute data
head(pu_data$cost)
## [1] 29.74225 29.87703 28.60687 30.83416 38.75511 38.11618
# print the highest cost value
max_cost <- max(pu_data$cost)
# print the smallest cost value
min(pu_data$cost)
## [1] 3.597175
# print average cost value
mean(pu_data$cost)
## [1] 26.87393
# plot a map of the planning unit cost data
spplot(pu_data, "cost")
# plot an interactive map of the planning unit cost data
mapview(pu_data, zcol = "cost")
There are 516 planning units.
The highest cost value is 47.2383364.
use `plot` to make a map)?
Based on the map, it seems like the eastern side of Tasmania has lower cost cells ranging from approximately 5 - 15. The middle and northern sections of the area of interest have the highest cost values hovering around 45. And the western side of Tasmania has middling cost values hoving around 30.
# print a short summary of the data
print(veg_data)
## class : RasterStack
## dimensions : 164, 326, 53464, 32 (nrow, ncol, ncell, nlayers)
## resolution : 967, 1020 (x, y)
## extent : 298636.7, 613878.7, 5167756, 5335036 (xmin, xmax, ymin, ymax)
## crs : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs
## names : vegetation.1, vegetation.2, vegetation.3, vegetation.4, vegetation.5, vegetation.6, vegetation.7, vegetation.8, vegetation.9, vegetation.10, vegetation.11, vegetation.12, vegetation.13, vegetation.14, vegetation.15, ...
## min values : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## max values : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
# plot a map of the 20th vegetation class
plot(veg_data[[20]])
# plot an interactive map of the 20th vegetation class
mapview(veg_data[[20]])
# print number of rows in the data
nrow(veg_data)
## [1] 164
# print number of columns in the data
ncol(veg_data)
## [1] 326
# print number of cells in the data
ncell(veg_data)
## [1] 53464
# print number of layers in the data
nlayers(veg_data)
## [1] 32
# print resolution on the x-axis
xres(veg_data)
## [1] 967
# print resolution on the y-axis
yres(veg_data)
## [1] 1020
# print spatial extent of the grid, i.e. coordinates for corners
extent(veg_data)
## class : Extent
## xmin : 298636.7
## xmax : 613878.7
## ymin : 5167756
## ymax : 5335036
# print the coordinate reference system
print(veg_data@crs)
## CRS arguments:
## +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs
# print a summary of the first layer in the stack
print(veg_data[[1]])
## class : RasterLayer
## band : 1 (of 32 bands)
## dimensions : 164, 326, 53464 (nrow, ncol, ncell)
## resolution : 967, 1020 (x, y)
## extent : 298636.7, 613878.7, 5167756, 5335036 (xmin, xmax, ymin, ymax)
## crs : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs
## source : vegetation.tif
## names : vegetation.1
## values : 0, 1 (min, max)
# print the value in the 800th cell in the first layer of the stack
print(veg_data[[1]][800])
##
## 0
# print the value of the cell located in the 30th row and the 60th column of
# the first layer
print(veg_data[[1]][30, 60])
##
## 0
# calculate the sum of all the cell values in the first layer
cellStats(veg_data[[1]], "sum")
## [1] 17
# calculate the maximum value of all the cell values in the first layer
cellStats(veg_data[[1]], "max")
## [1] 1
# calculate the minimum value of all the cell values in the first layer
cellStats(veg_data[[1]], "min")
## [1] 0
# calculate the mean value of all the cell values in the first layer
cellStats(veg_data[[1]], "mean")
## [1] 0.00035239
mapview(veg_data[[13]])
Most of the 13th vegetation class data are found in the eastern part of the study area.
total_cells <- ncell(veg_data)
twelfth_cells <- cellStats(veg_data[[12]], "sum")
twelfth_proportion <- twelfth_cells / total_cells
twelfth_percent <- round(twelfth_proportion * 100, digits = 2)
Approximately 1.53% of the cells contain the 12th vegetation class.
Which vegetation class is the most abundant (i.e. present in the greatest number of cells)?
We need to understand how existing protected areas conserve native vegetation in Tasmania, Australia before we can prioritize areas for establishing protected areas
we will perform a “gap analysis” to assess how each biodiversity feature is represented by existing protected areas
we can compare the current representation to a target threshold of how much we want each feature represented
calculate how much of each vegetation feature occurs inside each planning unit (aka the abundance of the features)
use problem() to create an empty conservation planning problem p0 that only contains the planning unit and biodiversity data
use feature_abundances() to calculate the total amount of each feature in each planning unit
# create prioritizr problem with only the PU data + vegetation data
p0 <- problem(pu_data, veg_data, cost_column = "cost")
# print empty problem,
# we can see that only the cost and feature data are defined
print(p0)
# calculate amount of each feature in each planning unit
abundance_data <- feature_abundances(p0)
# print abundance data
print(abundance_data)
## # A tibble: 32 × 3
## feature absolute_abundance relative_abundance
## <chr> <dbl> <dbl>
## 1 vegetation.1 16.0 1
## 2 vegetation.2 14.3 1
## 3 vegetation.3 10.4 1
## 4 vegetation.4 17.8 1
## 5 vegetation.5 13.0 1
## 6 vegetation.6 14.3 1
## 7 vegetation.7 20.0 1
## 8 vegetation.8 14.0 1
## 9 vegetation.9 18.0 1
## 10 vegetation.10 20.0 1
## # … with 22 more rows
The abundance_data object contains three columns
the feature column contains the name of each feature (derived from names(veg_data))
the absolute_abundance column contains the total amount of each feature in all the planning units
the relative_abundance column contains the total amount of each feature in the planning units expressed as a proportion of the total amount in the underlying raster data.
since all the raster cells containing vegetation overlap with the planning units, all of the values in the relative_abundance column are equal to one (meaning 100%)
add a new column with the feature abundances expressed in area units (i.e. km2).
# add new column with feature abundances in km^2
abundance_data$absolute_abundance_km2 <-
(abundance_data$absolute_abundance * prod(res(veg_data))) %>%
set_units(m^2) %>%
set_units(km^2)
# print abundance data
print(abundance_data)
## # A tibble: 32 × 4
## feature absolute_abundance relative_abundance absolute_abundance_km2
## <chr> <dbl> <dbl> [km^2]
## 1 vegetation.1 16.0 1 15.8
## 2 vegetation.2 14.3 1 14.1
## 3 vegetation.3 10.4 1 10.2
## 4 vegetation.4 17.8 1 17.6
## 5 vegetation.5 13.0 1 12.8
## 6 vegetation.6 14.3 1 14.1
## 7 vegetation.7 20.0 1 19.7
## 8 vegetation.8 14.0 1 13.9
## 9 vegetation.9 18.0 1 17.8
## 10 vegetation.10 20.0 1 19.7
## # … with 22 more rows
# calculate the average abundance of the features
mean(abundance_data$absolute_abundance_km2)
## 86.82948 [km^2]
# plot histogram of the features' abundances
hist(abundance_data$absolute_abundance_km2, main = "Feature abundances")
# find the abundance of the feature with the largest abundance
max(abundance_data$absolute_abundance_km2)
## 737.982 [km^2]
# find the name of the feature with the largest abundance
abundance_data$feature[which.max(abundance_data$absolute_abundance_km2)]
## [1] "vegetation.12"
median)?median <- median(abundance_data$absolute_abundance_km2)
The median abundance of the features is 19.1165038783561 km2.
small <- abundance_data$feature[which.min(abundance_data$absolute_abundance_km2)]
The name of the feature with the smallest abundance is vegetation.3.
sum(abundance_data$absolute_abundance_km2 > set_units(threshold, km^2) with the correct threshold value)?threshold = 100
total_abundance <- sum(abundance_data$absolute_abundance_km2 > set_units(threshold, km^2))
There are 6 features with a total abundance greater than 100 km2.
calculate the amount of each feature in the planning units that are covered by protected areas (i.e. feature representation by protected areas)
use eval_feature_representation_summary(), which requires:
a conservation problem object with the planning unit and biodiversity data
an object representing a solution to the problem (i.e an object in the same format as the planning unit data with values indicating if the planning units are selected or not)
# create column in planning unit data with binary values (zeros and ones)
# indicating if a planning unit is covered by protected areas or not
pu_data$pa_status <- as.numeric(pu_data$locked_in)
# calculate feature representation by protected areas
repr_data <- eval_feature_representation_summary(p0, pu_data[, "pa_status"])
# print feature representation data
print(repr_data)
## # A tibble: 32 × 5
## summary feature total_amount absolute_held relative_held
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 overall vegetation.1 16.0 0 0
## 2 overall vegetation.2 14.3 0 0
## 3 overall vegetation.3 10.4 0 0
## 4 overall vegetation.4 17.8 0 0
## 5 overall vegetation.5 13.0 0 0
## 6 overall vegetation.6 14.3 0 0
## 7 overall vegetation.7 20.0 0 0
## 8 overall vegetation.8 14.0 0 0
## 9 overall vegetation.9 18.0 0.846 0.0470
## 10 overall vegetation.10 20.0 0 0
## # … with 22 more rows
The repr_data object contains three columns
the feature column contains the name of each feature
the absolute_held column shows the total amount of each feature held in the solution (i.e. the planning units covered by protected areas)
and the relative_held column shows the proportion of each feature held in the solution (i.e. the proportion of each feature’s spatial distribution held in protected areas).
the absolute_held values correspond to the number of grid cells in the veg_data object with overlap with protected areas
convert them to area units (i.e. km2) so we can report them.
# add new column with the areas represented in km^2
repr_data$absolute_held_km2 <-
(repr_data$absolute_held * prod(res(veg_data))) %>%
set_units(m^2) %>%
set_units(km^2)
# print representation data
print(repr_data)
## # A tibble: 32 × 6
## summary feature total_amount absolute_held relative_held absolute_held_k…
## <chr> <chr> <dbl> <dbl> <dbl> [km^2]
## 1 overall vegetation.1 16.0 0 0 0
## 2 overall vegetation.2 14.3 0 0 0
## 3 overall vegetation.3 10.4 0 0 0
## 4 overall vegetation.4 17.8 0 0 0
## 5 overall vegetation.5 13.0 0 0 0
## 6 overall vegetation.6 14.3 0 0 0
## 7 overall vegetation.7 20.0 0 0 0
## 8 overall vegetation.8 14.0 0 0 0
## 9 overall vegetation.9 18.0 0.846 0.0470 0.834
## 10 overall vegetation.10 20.0 0 0 0
## # … with 22 more rows
mean(table$relative_held) with the correct table name)?mean <- mean(repr_data$relative_held)
perc <- round(mean * 100, digits = 2)
On average, 24.15% of features are held in protected areas
sum(table$relative_held >= target_value) with the correct table name)?target_value = .10
sum <- sum(repr_data$relative_held >= target_value)
With a target of 10% coverage by protected areas, 15 features do not meet this target threshold.
##12. If we set a target of 20% coverage by protected areas, how many features fail to meet this target?
target_value = .20
sum <- sum(repr_data$relative_held >= target_value)
With a target of 20% coverage by protected areas, 14 features do not meet this target threshold.
##13. Is there a relationship between the total abundance of a feature and how well it is represented by protected areas (hint: plot(abundance_data$absolute_abundance ~ repr_data$relative_held))?
plot(abundance_data$absolute_abundance ~ repr_data$relative_held)
It seems like features that are more abundant typically are less represented by protected areas. This is what we might expect since protected areas usually prioritize rare or endangered species. Species with high total abundance values likely will not need to be protected.
develop prioritizations to identify priority areas for protected area establishment
prioritizr is a decision support tool (similar to Marxan and Zonation)
cost column to specify acquisition costsAlthough we strongly recommend using Gurobi to solve problems (with add_gurobi_solver), we will use the lpsymphony solver in this workshop since it is easier to install
# print planning unit data
print(pu_data)
## class : SpatialPolygonsDataFrame
## features : 516
## extent : 348703.2, 611932.4, 5167775, 5344516 (xmin, xmax, ymin, ymax)
## crs : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs
## variables : 6
## names : id, cost, status, locked_in, locked_out, pa_status
## min values : 557, 3.59717531470679, 0, 0, 0, 0
## max values : 1130, 47.238336402701, 2, 1, 1, 1
# make prioritization problem
p1_rds <- file.path(dir_data, "p1.rds")
if (!file.exists(p1_rds)){
p1 <- problem(pu_data, veg_data, cost_column = "cost") %>%
add_min_set_objective() %>%
add_relative_targets(0.05) %>% # 5% representation targets
add_binary_decisions() %>%
add_lpsymphony_solver()
saveRDS(p1, p1_rds)
}
p1 <- readRDS(p1_rds)
# print problem
print(p1)
# solve problem
s1 <- solve(p1)
# print solution, the solution_1 column contains the solution values
# indicating if a planning unit is (1) selected or (0) not
print(s1)
## class : SpatialPolygonsDataFrame
## features : 516
## extent : 348703.2, 611932.4, 5167775, 5344516 (xmin, xmax, ymin, ymax)
## crs : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs
## variables : 7
## names : id, cost, status, locked_in, locked_out, pa_status, solution_1
## min values : 557, 3.59717531470679, 0, 0, 0, 0, 0
## max values : 1130, 47.238336402701, 2, 1, 1, 1, 1
# calculate number of planning units selected in the prioritization
eval_n_summary(p1, s1[, "solution_1"])
## # A tibble: 1 × 2
## summary cost
## <chr> <dbl>
## 1 overall 15
# calculate total cost of the prioritization
eval_cost_summary(p1, s1[, "solution_1"])
## # A tibble: 1 × 2
## summary cost
## <chr> <dbl>
## 1 overall 385.
# plot solution
# selected = green, not selected = grey
spplot(s1, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s1",
colorkey = FALSE)
##14. How many planing units were selected in the prioritization? What proportion of planning units were selected in the prioritization?
n <- 15
N <- nrow(pu_data)
prop <- n / N
perc <- round(prop * 100, digits = 2)
The prioritization selected 15 planning units, which is 2.91 of the 516 total planning units.
##15. Is there a pattern in the spatial distribution of the priority areas?
There is no clear pattern in the spatial distribution of the priority areas. They look scattered and random.
##16. Can you verify that all of the targets were met in the prioritization (hint: eval_feature_representation_summary(p1, s1[, "solution_1"]))?
eval_feature_representation_summary(p1, s1[, "solution_1"])
## # A tibble: 32 × 5
## summary feature total_amount absolute_held relative_held
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 overall vegetation.1 16.0 2.90 0.181
## 2 overall vegetation.2 14.3 1 0.0699
## 3 overall vegetation.3 10.4 1 0.0963
## 4 overall vegetation.4 17.8 3 0.168
## 5 overall vegetation.5 13.0 1 0.0769
## 6 overall vegetation.6 14.3 1.94 0.136
## 7 overall vegetation.7 20.0 1.75 0.0875
## 8 overall vegetation.8 14.0 2.80 0.200
## 9 overall vegetation.9 18.0 2.16 0.120
## 10 overall vegetation.10 20.0 2 0.0999
## # … with 22 more rows
pu_da) already contains this information in the locked_in column, we can use this column name to specify which planning units should be locked in# plot locked_in data
# TRUE = blue, FALSE = grey
spplot(pu_data, "locked_in", col.regions = c("grey80", "darkblue"),
main = "locked_in", colorkey = FALSE)
# make prioritization problem
p2_rds <- file.path(dir_data, "p2.rds")
redo <- FALSE
if (!file.exists(p2_rds) | redo){
p2 <- problem(pu_data, veg_data, cost_column = "cost") %>%
add_min_set_objective() %>%
add_relative_targets(0.05) %>%
add_locked_in_constraints("locked_in") %>%
add_binary_decisions() %>%
add_lpsymphony_solver()
saveRDS(p2, p2_rds)
}
p2 <- readRDS(p2_rds)
# print problem
print(p2)
# solve problem
s2 <- solve(p2)
# plot solution
# selected = green, not selected = grey
spplot(s2, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s2",
colorkey = FALSE)
Let’s pretend that we talked to an expert on the vegetation communities in our study system and they recommended that a 10% target was needed for each vegetation class. So, equipped with this information, let’s set the targets to 10%.
# make prioritization problem
p3_rds <- file.path(dir_data, "p3.rds")
if (!file.exists(p3_rds)){
p3 <- problem(pu_data, veg_data, cost_column = "cost") %>%
add_min_set_objective() %>%
add_relative_targets(0.1) %>%
add_locked_in_constraints("locked_in") %>%
add_binary_decisions() %>%
add_lpsymphony_solver()
saveRDS(p3, p3_rds)
}
p3 <- readRDS(p3_rds)
# print problem
print(p3)
# solve problem
s3 <- solve(p3)
# plot solution
# selected = green, not selected = grey
spplot(s3, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s3",
colorkey = FALSE)
Similar to before, this information is present in our planning unit data so we can use the locked_out column name to achieve this.
# plot locked_out data
# TRUE = red, FALSE = grey
spplot(pu_data, "locked_out", col.regions = c("grey80", "darkred"),
main = "locked_out", colorkey = FALSE)
# make prioritization problem
p4_rds <- file.path(dir_data, "p4.rds")
if (!file.exists(p4_rds)){
p4 <- problem(pu_data, veg_data, cost_column = "cost") %>%
add_min_set_objective() %>%
add_relative_targets(0.1) %>%
add_locked_in_constraints("locked_in") %>%
add_locked_out_constraints("locked_out") %>%
add_binary_decisions() %>%
add_lpsymphony_solver()
saveRDS(p4, p4_rds)
}
p4 <- readRDS(p4_rds)
# print problem
print(p4)
# solve problem
s4 <- solve(p4)
# plot solution
# selected = green, not selected = grey
spplot(s4, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s4",
colorkey = FALSE)
##17. What is the cost of the planning units selected in s2, s3, and s4?
# print solution, the solution_1 column contains the solution values - indicating if a planning unit is (1) selected or (0) not
print(s2)
## class : SpatialPolygonsDataFrame
## features : 516
## extent : 348703.2, 611932.4, 5167775, 5344516 (xmin, xmax, ymin, ymax)
## crs : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs
## variables : 7
## names : id, cost, status, locked_in, locked_out, pa_status, solution_1
## min values : 557, 3.59717531470679, 0, 0, 0, 0, 0
## max values : 1130, 47.238336402701, 2, 1, 1, 1, 1
# calculate total cost of the prioritization
s2_cost <- eval_cost_summary(p2, s2[, "solution_1"])
s3_cost <- eval_cost_summary(p3, s3[, "solution_1"])
s4_cost <- eval_cost_summary(p4, s4[, "solution_1"])
The costs associated with s2, s3, and s4 are 6600.0886758, 6669.9087144, and 6711.581848 respectively.
##18. How many planning units are in s2, s3, and s4?
# calculate number of planning units selected in the prioritization
s2_n <- eval_n_summary(p2, s2[, "solution_1"])
s3_n <- eval_n_summary(p3, s3[, "solution_1"])
s4_n <- eval_n_summary(p4, s4[, "solution_1"])
There are 205 planning units in s2, 211 planning units in s3, and 212 planning units in s4.
##19. Do the solutions with more planning units have a greater cost? Why (or why not)?
Yes, s4 has the highest cost (6711.581848) with its overall, 212 planning units. Because solution 4 has the most constraints, it needs the most planning units to meet its 10% threshold. More planning units means there is a higher cost associated.
##20. Why does the first solution (s1) cost less than the second solution with protected areas locked into the solution (s2)?
The first solution costs less than the second solution because in the second solution, we added the complexity of locking in existing protected areas. Thus, s2 has to select planning units from the remaining higher-cost area. The existing protected area likely covers low-cost planning units.
##21. Why does the third solution (s3) cost less than the fourth solution solution with highly degraded areas locked out (s4)?
plans for protected area systems should promote connectivity
the prioritizations we have made so far have been highly fragmented
we can add penalties to our conservation planning problem to penalize fragmentation
these penalties specify a trade-off between the primary objective (here, solution cost) and fragmentation (i.e. total exposed boundary length) using a penalty value
if we set the penalty value too low, then we will end up with a solution that is nearly identical to the previous solution
if we set the penalty value too high, then prioritizr will (1) take a long time to solve the problem and (2) we will end up with a solution that contains lots of extra planning units that are not needed.
the minimizing fragmentation is considered so much more important than solution cost that the optimal solution is simply to select as many planning units as possible
we generally want penalty values between 0.00001 and 0.01
ffinding a useful penalty value requires calibration
the “correct” penalty value depends on the size of the planning units, the main objective values (e.g. cost values), and the effect of fragmentation on biodiversity persistence.
Let’s create a new problem that is similar to our previous problem (p4)—except that it contains boundary length penalties—and solve it. Since our planning unit data is in a spatial format (i.e. vector or raster data), prioritizr can automatically calculate the boundary data for us.
# make prioritization problem
p5_rds <- file.path(dir_data, "p5.rds")
if (!file.exists(p5_rds)){
p5 <- problem(pu_data, veg_data, cost_column = "cost") %>%
add_min_set_objective() %>%
add_boundary_penalties(penalty = 0.001) %>%
add_relative_targets(0.1) %>%
add_locked_in_constraints("locked_in") %>%
add_locked_out_constraints("locked_out") %>%
add_binary_decisions() %>%
add_lpsymphony_solver()
saveRDS(p5, p5_rds)
}
p5 <- readRDS(p5_rds)
# print problem
print(p5)
# solve problem,
# note this will take a bit longer than the previous runs
s5 <- solve(p5)
# print solution
print(s5)
## class : SpatialPolygonsDataFrame
## features : 516
## extent : 348703.2, 611932.4, 5167775, 5344516 (xmin, xmax, ymin, ymax)
## crs : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs
## variables : 7
## names : id, cost, status, locked_in, locked_out, pa_status, solution_1
## min values : 557, 3.59717531470679, 0, 0, 0, 0, 0
## max values : 1130, 47.238336402701, 2, 1, 1, 1, 1
# plot solution
# selected = green, not selected = grey
spplot(s5, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s5",
colorkey = FALSE)
Now let’s compare the solutions to the problems with (s5) and without (s4) the boundary length penalties.
##22. What is the cost the fourth (s4) and fifth (s5) solutions? Why does the fifth solution (s5) cost more than the fourth (s4) solution?
# calculate total cost of the prioritization
s4_cost <- eval_cost_summary(p4, s4[, "solution_1"])
s5_cost <- eval_cost_summary(p5, s5[, "solution_1"])
The fourth solution costs 6711.581848, and the fifth solution costs 6747.585313. The fifth solution costs more because of the boundary penalty we placed on it. The boundary penalty forced the solution to clump planning units together even if those planning units were relatively high cost.
##23. Try setting the penalty value to 0.000000001 (i.e. 1e-9) instead of 0.001. What is the cost of the solution now? Is it different from the fourth solution (s4) (hint: try plotting the solutions to visualize them)? Is this is a useful penalty value? Why (or why not)?
# make prioritization problem
p6_rds <- file.path(dir_data, "p6.rds")
if (!file.exists(p6_rds)){
p6 <- problem(pu_data, veg_data, cost_column = "cost") %>%
add_min_set_objective() %>%
add_boundary_penalties(penalty = 0.000000001) %>%
add_relative_targets(0.1) %>%
add_locked_in_constraints("locked_in") %>%
add_locked_out_constraints("locked_out") %>%
add_binary_decisions() %>%
add_lpsymphony_solver()
saveRDS(p6, p6_rds)
}
p6 <- readRDS(p6_rds)
# print problem
print(p6)
# solve problem,
# note this will take a bit longer than the previous runs
s6 <- solve(p6)
# print solution
print(s6)
## class : SpatialPolygonsDataFrame
## features : 516
## extent : 348703.2, 611932.4, 5167775, 5344516 (xmin, xmax, ymin, ymax)
## crs : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs
## variables : 7
## names : id, cost, status, locked_in, locked_out, pa_status, solution_1
## min values : 557, 3.59717531470679, 0, 0, 0, 0, 0
## max values : 1130, 47.238336402701, 2, 1, 1, 1, 1
# plot solution
# selected = green, not selected = grey
spplot(s6, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s6",
colorkey = FALSE)
s6_cost <- eval_cost_summary(p6, s6[, "solution_1"])
The new solution with a very low penalty cost is 6711.7559287, which is very similar to the solution 4 cost of 6711.581848. The two maps also look identical, with lots of fragments in the eastern side of Tasmania. Since the two solutions look very similar, this was not a helpful penalty value.
##24. Try setting the penalty value to 0.5. What is the cost of the solution now? Is it different from the fourth solution (s4) (hint: try plotting the solutions to visualize them)? Is this a useful penalty value? Why (or why not)?
p7_rds <- file.path(dir_data, "p7.rds")
if (!file.exists(p7_rds)){
p7 <- problem(pu_data, veg_data, cost_column = "cost") %>%
add_min_set_objective() %>%
add_boundary_penalties(penalty = 0.5) %>%
add_relative_targets(0.1) %>%
add_locked_in_constraints("locked_in") %>%
add_locked_out_constraints("locked_out") %>%
add_binary_decisions() %>%
add_lpsymphony_solver()
saveRDS(p7, p7_rds)
}
p7 <- readRDS(p7_rds)
# print problem
print(p7)
# solve problem,
# note this will take a bit longer than the previous runs
s7 <- solve(p7)
# print solution
print(s7)
## class : SpatialPolygonsDataFrame
## features : 516
## extent : 348703.2, 611932.4, 5167775, 5344516 (xmin, xmax, ymin, ymax)
## crs : +proj=utm +zone=55 +south +datum=WGS84 +units=m +no_defs
## variables : 7
## names : id, cost, status, locked_in, locked_out, pa_status, solution_1
## min values : 557, 3.59717531470679, 0, 0, 0, 0, 0
## max values : 1130, 47.238336402701, 2, 1, 1, 1, 1
# plot solution
# selected = green, not selected = grey
spplot(s7, "solution_1", col.regions = c("grey80", "darkgreen"), main = "s7",
colorkey = FALSE)
s7_cost <- eval_cost_summary(p7, s7[, "solution_1"])
The final solution has a cost of 9816.639992, which is much higher than the s4 cost of 6711.581848. This is because the extremely high penalty cost forced the solution to lump every single planning cell together, even extremely high cost cells. This is the opposite end of the spectrum of s6 and still too extreme to be useful for conservation planning.